// Package roaring is an implementation of Roaring Bitmaps in Go. // They provide fast compressed bitmap data structures (also called bitset). // They are ideally suited to represent sets of integers over // relatively small ranges. // See http://roaringbitmap.org for details.
package roaring import ( ) // Bitmap represents a compressed bitmap where you can add integers. type Bitmap struct { highlowcontainer roaringArray } // ToBase64 serializes a bitmap as Base64 func ( *Bitmap) () (string, error) { := new(bytes.Buffer) , := .WriteTo() return base64.StdEncoding.EncodeToString(.Bytes()), } // FromBase64 deserializes a bitmap from Base64 func ( *Bitmap) ( string) (int64, error) { , := base64.StdEncoding.DecodeString() if != nil { return 0, } := bytes.NewBuffer() return .ReadFrom() } // WriteTo writes a serialized version of this bitmap to stream. // The format is compatible with other RoaringBitmap // implementations (Java, C) and is documented here: // https://github.com/RoaringBitmap/RoaringFormatSpec func ( *Bitmap) ( io.Writer) (int64, error) { return .highlowcontainer.writeTo() } // ToBytes returns an array of bytes corresponding to what is written // when calling WriteTo func ( *Bitmap) () ([]byte, error) { return .highlowcontainer.toBytes() } const wordSize = uint64(64) const log2WordSize = uint64(6) const capacity = ^uint64(0) const bitmapContainerSize = (1 << 16) / 64 // bitmap size in words // DenseSize returns the size of the bitmap when stored as a dense bitmap. func ( *Bitmap) () uint64 { if .highlowcontainer.size() == 0 { return 0 } := 1 + uint64(.Maximum()) if > (capacity - wordSize + 1) { return uint64(capacity >> log2WordSize) } return uint64(( + (wordSize - 1)) >> log2WordSize) } // ToDense returns a slice of uint64s representing the bitmap as a dense bitmap. // Useful to convert a roaring bitmap to a format that can be used by other libraries // like https://github.com/bits-and-blooms/bitset or https://github.com/kelindar/bitmap func ( *Bitmap) () []uint64 { := .DenseSize() if == 0 { return nil } := make([]uint64, ) .WriteDenseTo() return } // FromDense creates a bitmap from a slice of uint64s representing the bitmap as a dense bitmap. // Useful to convert bitmaps from libraries like https://github.com/bits-and-blooms/bitset or // https://github.com/kelindar/bitmap into roaring bitmaps fast and with convenience. // // This function will not create any run containers, only array and bitmap containers. It's up to // the caller to call RunOptimize if they want to further compress the runs of consecutive values. // // When doCopy is true, the bitmap is copied into a new slice for each bitmap container. // This is useful when the bitmap is going to be modified after this function returns or if it's // undesirable to hold references to large bitmaps which the GC would not be able to collect. // One copy can still happen even when doCopy is false if the bitmap length is not divisible // by bitmapContainerSize. // // See also FromBitSet. func ( []uint64, bool) *Bitmap { := (len() + bitmapContainerSize - 1) / bitmapContainerSize // round up := &Bitmap{ highlowcontainer: roaringArray{ containers: make([]container, 0, ), keys: make([]uint16, 0, ), needCopyOnWrite: make([]bool, 0, ), }, } .FromDense(, ) return } // FromDense unmarshalls from a slice of uint64s representing the bitmap as a dense bitmap. // Useful to convert bitmaps from libraries like https://github.com/bits-and-blooms/bitset or // https://github.com/kelindar/bitmap into roaring bitmaps fast and with convenience. // Callers are responsible for ensuring that the bitmap is empty before calling this function. // // This function will not create any run containers, only array and bitmap containers. It is up to // the caller to call RunOptimize if they want to further compress the runs of consecutive values. // // When doCopy is true, the bitmap is copied into a new slice for each bitmap container. // This is useful when the bitmap is going to be modified after this function returns or if it's // undesirable to hold references to large bitmaps which the GC would not be able to collect. // One copy can still happen even when doCopy is false if the bitmap length is not divisible // by bitmapContainerSize. // // See FromBitSet. func ( *Bitmap) ( []uint64, bool) { if len() == 0 { return } var uint16 const = bitmapContainerSize for len() > 0 { := if len() < { = len() } := [:] := int(popcntSlice()) switch { case > arrayDefaultMaxSize: := &bitmapContainer{cardinality: , bitmap: } := true if || len() < { .bitmap = make([]uint64, ) copy(.bitmap, ) = false } .highlowcontainer.appendContainer(, , ) case > 0: := &arrayContainer{content: make([]uint16, )} var , int for , := range { for != 0 { := & - .content[] = uint16( + int(popcount(-1))) ++ ^= } += 64 } .highlowcontainer.appendContainer(, , false) } = [:] ++ } } // WriteDenseTo writes to a slice of uint64s representing the bitmap as a dense bitmap. // Callers are responsible for allocating enough space in the bitmap using DenseSize. // Useful to convert a roaring bitmap to a format that can be used by other libraries // like https://github.com/bits-and-blooms/bitset or https://github.com/kelindar/bitmap func ( *Bitmap) ( []uint64) { for , := range .highlowcontainer.containers { := uint32(.highlowcontainer.keys[]) << 16 switch c := .(type) { case *arrayContainer: for , := range .content { := int( | uint32()) [>>log2WordSize] |= uint64(1) << uint(%64) } case *bitmapContainer: copy([int()>>log2WordSize:], .bitmap) case *runContainer16: for := range .iv { := uint32(.iv[].start) := + uint32(.iv[].length) + 1 := int(|) >> log2WordSize := int(|(-1)) >> log2WordSize if == { [] |= (^uint64(0) << uint(%64)) & (^uint64(0) >> (uint(-) % 64)) continue } [] |= ^uint64(0) << uint(%64) for := + 1; < ; ++ { [] = ^uint64(0) } [] |= ^uint64(0) >> (uint(-) % 64) } default: panic("unsupported container type") } } } // Checksum computes a hash (currently FNV-1a) for a bitmap that is suitable for // using bitmaps as elements in hash sets or as keys in hash maps, as well as // generally quicker comparisons. // The implementation is biased towards efficiency in little endian machines, so // expect some extra CPU cycles and memory to be used if your machine is big endian. // Likewise, do not use this to verify integrity unless you are certain you will load // the bitmap on a machine with the same endianess used to create it. (Thankfully // very few people use big endian machines these days.) func ( *Bitmap) () uint64 { const ( = 14695981039346656037 = 1099511628211 ) var []byte := uint64() = uint16SliceAsByteSlice(.highlowcontainer.keys) for , := range { ^= uint64() *= } for , := range .highlowcontainer.containers { // 0 separator ^= 0 *= switch c := .(type) { case *bitmapContainer: = uint64SliceAsByteSlice(.bitmap) case *arrayContainer: = uint16SliceAsByteSlice(.content) case *runContainer16: = interval16SliceAsByteSlice(.iv) default: panic("invalid container type") } if len() == 0 { panic("empty containers are not supported") } for , := range { ^= uint64() *= } } return } // FromUnsafeBytes reads a serialized version of this bitmap from the byte buffer without copy. // It is the caller's responsibility to ensure that the input data is not modified and remains valid for the entire lifetime of this bitmap. // This method avoids small allocations but holds references to the input data buffer. It is GC-friendly, but it may consume more memory eventually. // The containers in the resulting bitmap are immutable containers tied to the provided byte array and they rely on // copy-on-write which means that modifying them creates copies. Thus FromUnsafeBytes is more likely to be appropriate for read-only use cases, // when the resulting bitmap can be considered immutable. // // See also the FromBuffer function. // See https://github.com/RoaringBitmap/roaring/pull/395 for more details. func ( *Bitmap) ( []byte, ...byte) ( int64, error) { := internal.NewByteBuffer() return .ReadFrom() } // ReadFrom reads a serialized version of this bitmap from stream. // The format is compatible with other RoaringBitmap // implementations (Java, C) and is documented here: // https://github.com/RoaringBitmap/RoaringFormatSpec // Since io.Reader is regarded as a stream and cannot be read twice. // So add cookieHeader to accept the 4-byte data that has been read in roaring64.ReadFrom. // It is not necessary to pass cookieHeader when call roaring.ReadFrom to read the roaring32 data directly. func ( *Bitmap) ( io.Reader, ...byte) ( int64, error) { , := .(internal.ByteInput) if ! { := internal.ByteInputAdapterPool.Get().(*internal.ByteInputAdapter) .Reset() = } , = .highlowcontainer.readFrom(, ...) if ! { internal.ByteInputAdapterPool.Put(.(*internal.ByteInputAdapter)) } return } // FromBuffer creates a bitmap from its serialized version stored in buffer // // The format specification is available here: // https://github.com/RoaringBitmap/RoaringFormatSpec // // The provided byte array (buf) is expected to be a constant. // The function makes the best effort attempt not to copy data. // You should take care not to modify buff as it will // likely result in unexpected program behavior. // // Resulting bitmaps are effectively immutable in the following sense: // a copy-on-write marker is used so that when you modify the resulting // bitmap, copies of selected data (containers) are made. // You should *not* change the copy-on-write status of the resulting // bitmaps (SetCopyOnWrite). // // Thus FromBuffer is more likely to be appropriate for read-only use cases, // when the resulting bitmap can be considered immutable. // // If buf becomes unavailable, then a bitmap created with // FromBuffer would be effectively broken. Furthermore, any // bitmap derived from this bitmap (e.g., via Or, And) might // also be broken. Thus, before making buf unavailable, you should // call CloneCopyOnWriteContainers on all such bitmaps. // // See also the FromUnsafeBytes function which can have better performance // in some cases. func ( *Bitmap) ( []byte) ( int64, error) { := internal.ByteBufferPool.Get().(*internal.ByteBuffer) .Reset() , = .highlowcontainer.readFrom() internal.ByteBufferPool.Put() return } // RunOptimize attempts to further compress the runs of consecutive values found in the bitmap func ( *Bitmap) () { .highlowcontainer.runOptimize() } // HasRunCompression returns true if the bitmap benefits from run compression func ( *Bitmap) () bool { return .highlowcontainer.hasRunCompression() } // MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap // (same as ToBytes) func ( *Bitmap) () ([]byte, error) { return .ToBytes() } // UnmarshalBinary implements the encoding.BinaryUnmarshaler interface for the bitmap func ( *Bitmap) ( []byte) error { := bytes.NewReader() , := .ReadFrom() return } // NewBitmap creates a new empty Bitmap (see also New) func () *Bitmap { return &Bitmap{} } // New creates a new empty Bitmap (same as NewBitmap) func () *Bitmap { return &Bitmap{} } // Clear resets the Bitmap to be logically empty, but may retain // some memory allocations that may speed up future operations func ( *Bitmap) () { .highlowcontainer.clear() } // ToBitSet copies the content of the RoaringBitmap into a bitset.BitSet instance func ( *Bitmap) () *bitset.BitSet { return bitset.From(.ToDense()) } // FromBitSet creates a new RoaringBitmap from a bitset.BitSet instance func ( *bitset.BitSet) *Bitmap { return FromDense(.Bytes(), false) } // ToArray creates a new slice containing all of the integers stored in the Bitmap in sorted order func ( *Bitmap) () []uint32 { := make([]uint32, .GetCardinality()) := 0 := 0 for < .highlowcontainer.size() { := uint32(.highlowcontainer.getKeyAtIndex()) << 16 := .highlowcontainer.getContainerAtIndex() ++ = .fillLeastSignificant16bits(, , ) } return } // GetSizeInBytes estimates the memory usage of the Bitmap. Note that this // might differ slightly from the amount of bytes required for persistent storage func ( *Bitmap) () uint64 { := uint64(8) for , := range .highlowcontainer.containers { += uint64(2) + uint64(.getSizeInBytes()) } return } // GetSerializedSizeInBytes computes the serialized size in bytes // of the Bitmap. It should correspond to the // number of bytes written when invoking WriteTo. You can expect // that this function is much cheaper computationally than WriteTo. func ( *Bitmap) () uint64 { return .highlowcontainer.serializedSizeInBytes() } // BoundSerializedSizeInBytes returns an upper bound on the serialized size in bytes // assuming that one wants to store "cardinality" integers in [0, universe_size) func ( uint64, uint64) uint64 { := ( + uint64(65535)) / uint64(65536) if > { = // we cannot have more containers than we have values } := 8* + 4 if 4 > (+7)/8 { += 4 } else { += ( + 7) / 8 } := uint64(arrayContainerSizeInBytes(int())) := * uint64(bitmapContainerSizeInBytes()) := if > { = } return + } // IntIterable allows you to iterate over the values in a Bitmap type IntIterable interface { HasNext() bool Next() uint32 } // IntPeekable allows you to look at the next value without advancing and // advance as long as the next value is smaller than minval type IntPeekable interface { IntIterable // PeekNext peeks the next value without advancing the iterator PeekNext() uint32 // AdvanceIfNeeded advances as long as the next value is smaller than minval AdvanceIfNeeded(minval uint32) } type intIterator struct { pos int hs uint32 iter shortPeekable highlowcontainer *roaringArray // These embedded iterators per container type help reduce load in the GC. // This way, instead of making up-to 64k allocations per full iteration // we get a single allocation and simply reinitialize the appropriate // iterator and point to it in the generic `iter` member on each key bound. shortIter shortIterator runIter runIterator16 bitmapIter bitmapContainerShortIterator } // HasNext returns true if there are more integers to iterate over func ( *intIterator) () bool { return .pos < .highlowcontainer.size() } func ( *intIterator) () { if .highlowcontainer.size() > .pos { .hs = uint32(.highlowcontainer.getKeyAtIndex(.pos)) << 16 := .highlowcontainer.getContainerAtIndex(.pos) switch t := .(type) { case *arrayContainer: .shortIter = shortIterator{.content, 0} .iter = &.shortIter case *runContainer16: .runIter = runIterator16{rc: , curIndex: 0, curPosInIndex: 0} .iter = &.runIter case *bitmapContainer: .bitmapIter = bitmapContainerShortIterator{, .NextSetBit(0)} .iter = &.bitmapIter } } } // Next returns the next integer func ( *intIterator) () uint32 { := uint32(.iter.next()) | .hs if !.iter.hasNext() { .pos = .pos + 1 .init() } return } // PeekNext peeks the next value without advancing the iterator func ( *intIterator) () uint32 { return uint32(.iter.peekNext()&maxLowBit) | .hs } // AdvanceIfNeeded advances as long as the next value is smaller than minval func ( *intIterator) ( uint32) { := & 0xffff0000 for .HasNext() && .hs < { .pos++ .init() } if .HasNext() && .hs == { .iter.advanceIfNeeded(lowbits()) if !.iter.hasNext() { .pos++ .init() } } } // IntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap) type IntIterator = intIterator // Initialize configures the existing iterator so that it can iterate through the values of // the provided bitmap. // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). func ( *intIterator) ( *Bitmap) { .pos = 0 .highlowcontainer = &.highlowcontainer .init() } type intReverseIterator struct { pos int hs uint32 iter shortIterable highlowcontainer *roaringArray shortIter reverseIterator runIter runReverseIterator16 bitmapIter reverseBitmapContainerShortIterator } // HasNext returns true if there are more integers to iterate over func ( *intReverseIterator) () bool { return .pos >= 0 } func ( *intReverseIterator) () { if .pos >= 0 { .hs = uint32(.highlowcontainer.getKeyAtIndex(.pos)) << 16 := .highlowcontainer.getContainerAtIndex(.pos) switch t := .(type) { case *arrayContainer: .shortIter = reverseIterator{.content, len(.content) - 1} .iter = &.shortIter case *runContainer16: := int(len(.iv)) - 1 := uint16(0) if >= 0 { = .iv[].length } .runIter = runReverseIterator16{rc: , curIndex: , curPosInIndex: } .iter = &.runIter case *bitmapContainer: := -1 if .cardinality > 0 { = int(.maximum()) } .bitmapIter = reverseBitmapContainerShortIterator{, } .iter = &.bitmapIter } } else { .iter = nil } } // Next returns the next integer func ( *intReverseIterator) () uint32 { := uint32(.iter.next()) | .hs if !.iter.hasNext() { .pos = .pos - 1 .init() } return } // IntReverseIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap) type IntReverseIterator = intReverseIterator // Initialize configures the existing iterator so that it can iterate through the values of // the provided bitmap. // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). func ( *intReverseIterator) ( *Bitmap) { .highlowcontainer = &.highlowcontainer .pos = .highlowcontainer.size() - 1 .init() } // ManyIntIterable allows you to iterate over the values in a Bitmap type ManyIntIterable interface { // NextMany fills buf up with values, returns how many values were returned NextMany(buf []uint32) int // NextMany64 fills up buf with 64 bit values, uses hs as a mask (OR), returns how many values were returned NextMany64(hs uint64, buf []uint64) int } type manyIntIterator struct { pos int hs uint32 iter manyIterable highlowcontainer *roaringArray shortIter shortIterator runIter runIterator16 bitmapIter bitmapContainerManyIterator } func ( *manyIntIterator) () { if .highlowcontainer.size() > .pos { .hs = uint32(.highlowcontainer.getKeyAtIndex(.pos)) << 16 := .highlowcontainer.getContainerAtIndex(.pos) switch t := .(type) { case *arrayContainer: .shortIter = shortIterator{.content, 0} .iter = &.shortIter case *runContainer16: .runIter = runIterator16{rc: , curIndex: 0, curPosInIndex: 0} .iter = &.runIter case *bitmapContainer: .bitmapIter = bitmapContainerManyIterator{, -1, 0} .iter = &.bitmapIter } } else { .iter = nil } } func ( *manyIntIterator) ( []uint32) int { := 0 for < len() { if .iter == nil { break } := .iter.nextMany(.hs, [:]) += if == 0 { .pos = .pos + 1 .init() } } return } func ( *manyIntIterator) ( uint64, []uint64) int { := 0 for < len() { if .iter == nil { break } := uint64(.hs) | := .iter.nextMany64(, [:]) += if == 0 { .pos = .pos + 1 .init() } } return } // ManyIntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap) type ManyIntIterator = manyIntIterator // Initialize configures the existing iterator so that it can iterate through the values of // the provided bitmap. // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). func ( *manyIntIterator) ( *Bitmap) { .pos = 0 .highlowcontainer = &.highlowcontainer .init() } // String creates a string representation of the Bitmap func ( *Bitmap) () string { // inspired by https://github.com/fzandona/goroar/ var bytes.Buffer := []byte("{") .Write() := .Iterator() := 0 if .HasNext() { = + 1 .WriteString(strconv.FormatInt(int64(.Next()), 10)) } for .HasNext() { .WriteString(",") = + 1 // to avoid exhausting the memory if > 0x40000 { .WriteString("...") break } .WriteString(strconv.FormatInt(int64(.Next()), 10)) } .WriteString("}") return .String() } // Iterate iterates over the bitmap, calling the given callback with each value in the bitmap. If the callback returns // false, the iteration is halted. // The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). // There is no guarantee as to what order the values will be iterated. func ( *Bitmap) ( func( uint32) bool) { for := 0; < .highlowcontainer.size(); ++ { := uint32(.highlowcontainer.getKeyAtIndex()) << 16 := .highlowcontainer.getContainerAtIndex() var bool // This is hacky but it avoids allocations from invoking an interface method with a closure switch t := .(type) { case *arrayContainer: = .iterate(func( uint16) bool { return (uint32() | ) }) case *runContainer16: = .iterate(func( uint16) bool { return (uint32() | ) }) case *bitmapContainer: = .iterate(func( uint16) bool { return (uint32() | ) }) } if ! { break } } } // Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order; // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). func ( *Bitmap) () IntPeekable { := new(intIterator) .Initialize() return } // ReverseIterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order; // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). func ( *Bitmap) () IntIterable { := new(intReverseIterator) .Initialize() return } // ManyIterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order; // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). func ( *Bitmap) () ManyIntIterable { := new(manyIntIterator) .Initialize() return } // Clone creates a copy of the Bitmap func ( *Bitmap) () *Bitmap { := new(Bitmap) .highlowcontainer = *.highlowcontainer.clone() return } // Minimum get the smallest value stored in this roaring bitmap, assumes that it is not empty func ( *Bitmap) () uint32 { if len(.highlowcontainer.containers) == 0 { panic("Empty bitmap") } return uint32(.highlowcontainer.containers[0].minimum()) | (uint32(.highlowcontainer.keys[0]) << 16) } // Maximum get the largest value stored in this roaring bitmap, assumes that it is not empty func ( *Bitmap) () uint32 { if len(.highlowcontainer.containers) == 0 { panic("Empty bitmap") } := len(.highlowcontainer.containers) - 1 return uint32(.highlowcontainer.containers[].maximum()) | (uint32(.highlowcontainer.keys[]) << 16) } // Contains returns true if the integer is contained in the bitmap func ( *Bitmap) ( uint32) bool { := highbits() := .highlowcontainer.getContainer() return != nil && .contains(lowbits()) } // ContainsInt returns true if the integer is contained in the bitmap (this is a convenience method, the parameter is casted to uint32 and Contains is called) func ( *Bitmap) ( int) bool { return .Contains(uint32()) } // Equals returns true if the two bitmaps contain the same integers func ( *Bitmap) ( interface{}) bool { , := .(*Bitmap) if { return .highlowcontainer.equals(.highlowcontainer) } return false } // AddOffset adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process func ( *Bitmap, uint32) ( *Bitmap) { return AddOffset64(, int64()) } // AddOffset64 adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process // If offset + element is outside of the range [0,2^32), that the element will be dropped func ( *Bitmap, int64) ( *Bitmap) { // we need "offset" to be a long because we want to support values // between -0xFFFFFFFF up to +-0xFFFFFFFF var int64 if < 0 { = ( - (1 << 16) + 1) / (1 << 16) } else { = >> 16 } = New() if >= (1<<16) || < -(1<<16) { return } := int32() := (uint16)( - *(1<<16)) if == 0 { for := 0; < .highlowcontainer.size(); ++ { := int32(.highlowcontainer.getKeyAtIndex()) += if >= 0 && <= MaxUint16 { := .highlowcontainer.getContainerAtIndex().clone() .highlowcontainer.appendContainer(uint16(), , false) } } } else { for := 0; < .highlowcontainer.size(); ++ { := int32(.highlowcontainer.getKeyAtIndex()) += if +1 < 0 || > MaxUint16 { continue } := .highlowcontainer.getContainerAtIndex() , := .addOffset() if != nil && >= 0 { := .highlowcontainer.size() := int32(0) if > 0 { = int32(.highlowcontainer.getKeyAtIndex( - 1)) } if > 0 && == { := .highlowcontainer.getContainerAtIndex( - 1) := .ior() .highlowcontainer.setContainerAtIndex(-1, ) } else { .highlowcontainer.appendContainer(uint16(), , false) } } if != nil && +1 <= MaxUint16 { .highlowcontainer.appendContainer(uint16(+1), , false) } } } return } // Add the integer x to the bitmap func ( *Bitmap) ( uint32) { := highbits() := &.highlowcontainer := .getIndex() if >= 0 { var container = .getWritableContainerAtIndex().iaddReturnMinimized(lowbits()) .highlowcontainer.setContainerAtIndex(, ) } else { := newArrayContainer() .highlowcontainer.insertNewKeyValueAt(--1, , .iaddReturnMinimized(lowbits())) } } // add the integer x to the bitmap, return the container and its index func ( *Bitmap) ( uint32) (int, container) { := highbits() := &.highlowcontainer := .getIndex() var container if >= 0 { = .getWritableContainerAtIndex().iaddReturnMinimized(lowbits()) .highlowcontainer.setContainerAtIndex(, ) return , } := newArrayContainer() = .iaddReturnMinimized(lowbits()) .highlowcontainer.insertNewKeyValueAt(--1, , ) return - - 1, } // CheckedAdd adds the integer x to the bitmap and return true if it was added (false if the integer was already present) func ( *Bitmap) ( uint32) bool { // TODO: add unit tests for this method := highbits() := .highlowcontainer.getIndex() if >= 0 { := .highlowcontainer.getWritableContainerAtIndex() := .getCardinality() = .iaddReturnMinimized(lowbits()) .highlowcontainer.setContainerAtIndex(, ) return .getCardinality() > } := newArrayContainer() .highlowcontainer.insertNewKeyValueAt(--1, , .iaddReturnMinimized(lowbits())) return true } // AddInt adds the integer x to the bitmap (convenience method: the parameter is casted to uint32 and we call Add) func ( *Bitmap) ( int) { .Add(uint32()) } // Remove the integer x from the bitmap func ( *Bitmap) ( uint32) { := highbits() := .highlowcontainer.getIndex() if >= 0 { := .highlowcontainer.getWritableContainerAtIndex().iremoveReturnMinimized(lowbits()) .highlowcontainer.setContainerAtIndex(, ) if .highlowcontainer.getContainerAtIndex().isEmpty() { .highlowcontainer.removeAtIndex() } } } // CheckedRemove removes the integer x from the bitmap and return true if the integer was effectively removed (and false if the integer was not present) func ( *Bitmap) ( uint32) bool { // TODO: add unit tests for this method := highbits() := .highlowcontainer.getIndex() if >= 0 { := .highlowcontainer.getWritableContainerAtIndex() := .getCardinality() = .iremoveReturnMinimized(lowbits()) .highlowcontainer.setContainerAtIndex(, ) if .highlowcontainer.getContainerAtIndex().isEmpty() { .highlowcontainer.removeAtIndex() return true } return .getCardinality() < } return false } // IsEmpty returns true if the Bitmap is empty (it is faster than doing (GetCardinality() == 0)) func ( *Bitmap) () bool { return .highlowcontainer.size() == 0 } // GetCardinality returns the number of integers contained in the bitmap func ( *Bitmap) () uint64 { := uint64(0) for , := range .highlowcontainer.containers { += uint64(.getCardinality()) } return } // Rank returns the number of integers that are smaller or equal to x (Rank(infinity) would be GetCardinality()). // If you pass the smallest value, you get the value 1. If you pass a value that is smaller than the smallest // value, you get 0. Note that this function differs in convention from the Select function since it // return 1 and not 0 on the smallest value. func ( *Bitmap) ( uint32) uint64 { := uint64(0) for := 0; < .highlowcontainer.size(); ++ { := .highlowcontainer.getKeyAtIndex() if > highbits() { return } if < highbits() { += uint64(.highlowcontainer.getContainerAtIndex().getCardinality()) } else { return + uint64(.highlowcontainer.getContainerAtIndex().rank(lowbits())) } } return } // Select returns the xth integer in the bitmap. If you pass 0, you get // the smallest element. Note that this function differs in convention from // the Rank function which returns 1 on the smallest value. func ( *Bitmap) ( uint32) (uint32, error) { := for := 0; < .highlowcontainer.size(); ++ { := .highlowcontainer.getContainerAtIndex() := uint32(.getCardinality()) if >= { -= } else { := .highlowcontainer.getKeyAtIndex() return uint32()<<16 + uint32(.selectInt(uint16())), nil } } return 0, fmt.Errorf("cannot find %dth integer in a bitmap with only %d items", , .GetCardinality()) } // And computes the intersection between two bitmaps and stores the result in the current bitmap func ( *Bitmap) ( *Bitmap) { := 0 := 0 := 0 := .highlowcontainer.size() := .highlowcontainer.size() : for { if < && < { := .highlowcontainer.getKeyAtIndex() := .highlowcontainer.getKeyAtIndex() for { if == { := .highlowcontainer.getWritableContainerAtIndex() := .highlowcontainer.getContainerAtIndex() := .iand() if !.isEmpty() { .highlowcontainer.replaceKeyAndContainerAtIndex(, , , false) ++ } ++ ++ if ( == ) || ( == ) { break } = .highlowcontainer.getKeyAtIndex() = .highlowcontainer.getKeyAtIndex() } else if < { = .highlowcontainer.advanceUntil(, ) if == { break } = .highlowcontainer.getKeyAtIndex() } else { //s1 > s2 = .highlowcontainer.advanceUntil(, ) if == { break } = .highlowcontainer.getKeyAtIndex() } } } else { break } } .highlowcontainer.resize() } // OrCardinality returns the cardinality of the union between two bitmaps, bitmaps are not modified func ( *Bitmap) ( *Bitmap) uint64 { := 0 := 0 := .highlowcontainer.size() := .highlowcontainer.size() := uint64(0) : for { if ( < ) && ( < ) { := .highlowcontainer.getKeyAtIndex() := .highlowcontainer.getKeyAtIndex() for { if < { += uint64(.highlowcontainer.getContainerAtIndex().getCardinality()) ++ if == { break } = .highlowcontainer.getKeyAtIndex() } else if > { += uint64(.highlowcontainer.getContainerAtIndex().getCardinality()) ++ if == { break } = .highlowcontainer.getKeyAtIndex() } else { // TODO: could be faster if we did not have to materialize the container += uint64(.highlowcontainer.getContainerAtIndex().or(.highlowcontainer.getContainerAtIndex()).getCardinality()) ++ ++ if ( == ) || ( == ) { break } = .highlowcontainer.getKeyAtIndex() = .highlowcontainer.getKeyAtIndex() } } } else { break } } for ; < ; ++ { += uint64(.highlowcontainer.getContainerAtIndex().getCardinality()) } for ; < ; ++ { += uint64(.highlowcontainer.getContainerAtIndex().getCardinality()) } return } // AndCardinality returns the cardinality of the intersection between two bitmaps, bitmaps are not modified func ( *Bitmap) ( *Bitmap) uint64 { := 0 := 0 := uint64(0) := .highlowcontainer.size() := .highlowcontainer.size() : for { if < && < { := .highlowcontainer.getKeyAtIndex() := .highlowcontainer.getKeyAtIndex() for { if == { := .highlowcontainer.getContainerAtIndex() := .highlowcontainer.getContainerAtIndex() += uint64(.andCardinality()) ++ ++ if ( == ) || ( == ) { break } = .highlowcontainer.getKeyAtIndex() = .highlowcontainer.getKeyAtIndex() } else if < { = .highlowcontainer.advanceUntil(, ) if == { break } = .highlowcontainer.getKeyAtIndex() } else { //s1 > s2 = .highlowcontainer.advanceUntil(, ) if == { break } = .highlowcontainer.getKeyAtIndex() } } } else { break } } return } // IntersectsWithInterval checks whether a bitmap 'rb' and an open interval '[x,y)' intersect. func ( *Bitmap) (, uint64) bool { if >= { return false } if > MaxUint32 { return false } := intIterator{} .Initialize() .AdvanceIfNeeded(uint32()) if !.HasNext() { return false } if uint64(.Next()) >= { return false } return true } // Intersects checks whether two bitmap intersects, bitmaps are not modified func ( *Bitmap) ( *Bitmap) bool { := 0 := 0 := .highlowcontainer.size() := .highlowcontainer.size() : for { if < && < { := .highlowcontainer.getKeyAtIndex() := .highlowcontainer.getKeyAtIndex() for { if == { := .highlowcontainer.getContainerAtIndex() := .highlowcontainer.getContainerAtIndex() if .intersects() { return true } ++ ++ if ( == ) || ( == ) { break } = .highlowcontainer.getKeyAtIndex() = .highlowcontainer.getKeyAtIndex() } else if < { = .highlowcontainer.advanceUntil(, ) if == { break } = .highlowcontainer.getKeyAtIndex() } else { //s1 > s2 = .highlowcontainer.advanceUntil(, ) if == { break } = .highlowcontainer.getKeyAtIndex() } } } else { break } } return false } // Xor computes the symmetric difference between two bitmaps and stores the result in the current bitmap func ( *Bitmap) ( *Bitmap) { := 0 := 0 := .highlowcontainer.size() := .highlowcontainer.size() for { if ( < ) && ( < ) { := .highlowcontainer.getKeyAtIndex() := .highlowcontainer.getKeyAtIndex() if < { = .highlowcontainer.advanceUntil(, ) if == { break } } else if > { := .highlowcontainer.getWritableContainerAtIndex() .highlowcontainer.insertNewKeyValueAt(, .highlowcontainer.getKeyAtIndex(), ) ++ ++ ++ } else { // TODO: couple be computed in-place for reduced memory usage := .highlowcontainer.getContainerAtIndex().xor(.highlowcontainer.getContainerAtIndex()) if !.isEmpty() { .highlowcontainer.setContainerAtIndex(, ) ++ } else { .highlowcontainer.removeAtIndex() -- } ++ } } else { break } } if == { .highlowcontainer.appendCopyMany(.highlowcontainer, , ) } } // Or computes the union between two bitmaps and stores the result in the current bitmap func ( *Bitmap) ( *Bitmap) { := 0 := 0 := .highlowcontainer.size() := .highlowcontainer.size() : for ( < ) && ( < ) { := .highlowcontainer.getKeyAtIndex() := .highlowcontainer.getKeyAtIndex() for { if < { ++ if == { break } = .highlowcontainer.getKeyAtIndex() } else if > { .highlowcontainer.insertNewKeyValueAt(, , .highlowcontainer.getContainerAtIndex().clone()) ++ ++ ++ if == { break } = .highlowcontainer.getKeyAtIndex() } else { .highlowcontainer.replaceKeyAndContainerAtIndex(, , .highlowcontainer.getUnionedWritableContainer(, .highlowcontainer.getContainerAtIndex()), false) ++ ++ if ( == ) || ( == ) { break } = .highlowcontainer.getKeyAtIndex() = .highlowcontainer.getKeyAtIndex() } } } if == { .highlowcontainer.appendCopyMany(.highlowcontainer, , ) } } // AndNot computes the difference between two bitmaps and stores the result in the current bitmap func ( *Bitmap) ( *Bitmap) { := 0 := 0 := 0 := .highlowcontainer.size() := .highlowcontainer.size() : for { if < && < { := .highlowcontainer.getKeyAtIndex() := .highlowcontainer.getKeyAtIndex() for { if == { := .highlowcontainer.getWritableContainerAtIndex() := .highlowcontainer.getContainerAtIndex() := .iandNot() if !.isEmpty() { .highlowcontainer.replaceKeyAndContainerAtIndex(, , , false) ++ } ++ ++ if ( == ) || ( == ) { break } = .highlowcontainer.getKeyAtIndex() = .highlowcontainer.getKeyAtIndex() } else if < { := .highlowcontainer.getContainerAtIndex() := .highlowcontainer.needsCopyOnWrite() .highlowcontainer.replaceKeyAndContainerAtIndex(, , , ) ++ ++ if == { break } = .highlowcontainer.getKeyAtIndex() } else { //s1 > s2 = .highlowcontainer.advanceUntil(, ) if == { break } = .highlowcontainer.getKeyAtIndex() } } } else { break } } // TODO:implement as a copy for < { := .highlowcontainer.getContainerAtIndex() := .highlowcontainer.getKeyAtIndex() := .highlowcontainer.needsCopyOnWrite() .highlowcontainer.replaceKeyAndContainerAtIndex(, , , ) ++ ++ } .highlowcontainer.resize() } // Or computes the union between two bitmaps and returns the result func (, *Bitmap) *Bitmap { := NewBitmap() := 0 := 0 := .highlowcontainer.size() := .highlowcontainer.size() : for ( < ) && ( < ) { := .highlowcontainer.getKeyAtIndex() := .highlowcontainer.getKeyAtIndex() for { if < { .highlowcontainer.appendCopy(.highlowcontainer, ) ++ if == { break } = .highlowcontainer.getKeyAtIndex() } else if > { .highlowcontainer.appendCopy(.highlowcontainer, ) ++ if == { break } = .highlowcontainer.getKeyAtIndex() } else { .highlowcontainer.appendContainer(, .highlowcontainer.getContainerAtIndex().or(.highlowcontainer.getContainerAtIndex()), false) ++ ++ if ( == ) || ( == ) { break } = .highlowcontainer.getKeyAtIndex() = .highlowcontainer.getKeyAtIndex() } } } if == { .highlowcontainer.appendCopyMany(.highlowcontainer, , ) } else if == { .highlowcontainer.appendCopyMany(.highlowcontainer, , ) } return } // And computes the intersection between two bitmaps and returns the result func (, *Bitmap) *Bitmap { := NewBitmap() := 0 := 0 := .highlowcontainer.size() := .highlowcontainer.size() : for < && < { := .highlowcontainer.getKeyAtIndex() := .highlowcontainer.getKeyAtIndex() for { if == { := .highlowcontainer.getContainerAtIndex() = .and(.highlowcontainer.getContainerAtIndex()) if !.isEmpty() { .highlowcontainer.appendContainer(, , false) } ++ ++ if ( == ) || ( == ) { break } = .highlowcontainer.getKeyAtIndex() = .highlowcontainer.getKeyAtIndex() } else if < { = .highlowcontainer.advanceUntil(, ) if == { break } = .highlowcontainer.getKeyAtIndex() } else { // s1 > s2 = .highlowcontainer.advanceUntil(, ) if == { break } = .highlowcontainer.getKeyAtIndex() } } } return } // Xor computes the symmetric difference between two bitmaps and returns the result func (, *Bitmap) *Bitmap { := NewBitmap() := 0 := 0 := .highlowcontainer.size() := .highlowcontainer.size() for { if ( < ) && ( < ) { := .highlowcontainer.getKeyAtIndex() := .highlowcontainer.getKeyAtIndex() if < { .highlowcontainer.appendCopy(.highlowcontainer, ) ++ } else if > { .highlowcontainer.appendCopy(.highlowcontainer, ) ++ } else { := .highlowcontainer.getContainerAtIndex().xor(.highlowcontainer.getContainerAtIndex()) if !.isEmpty() { .highlowcontainer.appendContainer(, , false) } ++ ++ } } else { break } } if == { .highlowcontainer.appendCopyMany(.highlowcontainer, , ) } else if == { .highlowcontainer.appendCopyMany(.highlowcontainer, , ) } return } // AndNot computes the difference between two bitmaps and returns the result func (, *Bitmap) *Bitmap { := NewBitmap() := 0 := 0 := .highlowcontainer.size() := .highlowcontainer.size() : for { if < && < { := .highlowcontainer.getKeyAtIndex() := .highlowcontainer.getKeyAtIndex() for { if < { .highlowcontainer.appendCopy(.highlowcontainer, ) ++ if == { break } = .highlowcontainer.getKeyAtIndex() } else if == { := .highlowcontainer.getContainerAtIndex() := .highlowcontainer.getContainerAtIndex() := .andNot() if !.isEmpty() { .highlowcontainer.appendContainer(, , false) } ++ ++ if ( == ) || ( == ) { break } = .highlowcontainer.getKeyAtIndex() = .highlowcontainer.getKeyAtIndex() } else { //s1 > s2 = .highlowcontainer.advanceUntil(, ) if == { break } = .highlowcontainer.getKeyAtIndex() } } } else { break } } if == { .highlowcontainer.appendCopyMany(.highlowcontainer, , ) } return } // AddMany add all of the values in dat func ( *Bitmap) ( []uint32) { if len() == 0 { return } := [0] , := .addwithptr() for , := range [1:] { if highbits() == highbits() { = .iaddReturnMinimized(lowbits()) .highlowcontainer.setContainerAtIndex(, ) } else { , = .addwithptr() } = } } // BitmapOf generates a new bitmap filled with the specified integers func ( ...uint32) *Bitmap { := NewBitmap() .AddMany() return } // Flip negates the bits in the given range (i.e., [rangeStart,rangeEnd)), any integer present in this range and in the bitmap is removed, // and any integer present in the range and not in the bitmap is added. // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range // while uint64(0x100000000) cannot be represented as a 32-bit value. func ( *Bitmap) (, uint64) { if > MaxUint32+1 { panic("rangeEnd > MaxUint32+1") } if > MaxUint32+1 { panic("rangeStart > MaxUint32+1") } if >= { return } := uint32(highbits(uint32())) := uint32(lowbits(uint32())) := uint32(highbits(uint32( - 1))) := uint32(lowbits(uint32( - 1))) var uint32 = maxLowBit for := ; <= ; ++ { var uint32 if == { = uint32() } := if == { = uint32() } := .highlowcontainer.getIndex(uint16()) if >= 0 { := .highlowcontainer.getWritableContainerAtIndex().inot(int(), int()+1) if !.isEmpty() { .highlowcontainer.setContainerAtIndex(, ) } else { .highlowcontainer.removeAtIndex() } } else { // *think* the range of ones must never be // empty. .highlowcontainer.insertNewKeyValueAt(--1, uint16(), rangeOfOnes(int(), int())) } } } // FlipInt calls Flip after casting the parameters (convenience method) func ( *Bitmap) (, int) { .Flip(uint64(), uint64()) } // AddRange adds the integers in [rangeStart, rangeEnd) to the bitmap. // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range // while uint64(0x100000000) cannot be represented as a 32-bit value. func ( *Bitmap) (, uint64) { if >= { return } if -1 > MaxUint32 { panic("rangeEnd-1 > MaxUint32") } := uint32(highbits(uint32())) := uint32(lowbits(uint32())) := uint32(highbits(uint32( - 1))) := uint32(lowbits(uint32( - 1))) var uint32 = maxLowBit for := ; <= ; ++ { := uint32(0) if == { = } := if == { = } := .highlowcontainer.getIndex(uint16()) if >= 0 { := .highlowcontainer.getWritableContainerAtIndex().iaddRange(int(), int()+1) .highlowcontainer.setContainerAtIndex(, ) } else { // *think* the range of ones must never be // empty. .highlowcontainer.insertNewKeyValueAt(--1, uint16(), rangeOfOnes(int(), int())) } } } // RemoveRange removes the integers in [rangeStart, rangeEnd) from the bitmap. // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range // while uint64(0x100000000) cannot be represented as a 32-bit value. func ( *Bitmap) (, uint64) { if >= { return } if -1 > MaxUint32 { // logically, we should assume that the user wants to // remove all values from rangeStart to infinity // see https://github.com/RoaringBitmap/roaring/issues/141 = uint64(0x100000000) } := uint32(highbits(uint32())) := uint32(lowbits(uint32())) := uint32(highbits(uint32( - 1))) := uint32(lowbits(uint32( - 1))) var uint32 = maxLowBit if == { := .highlowcontainer.getIndex(uint16()) if < 0 { return } := .highlowcontainer.getWritableContainerAtIndex().iremoveRange(int(), int(+1)) if !.isEmpty() { .highlowcontainer.setContainerAtIndex(, ) } else { .highlowcontainer.removeAtIndex() } return } := .highlowcontainer.getIndex(uint16()) := .highlowcontainer.getIndex(uint16()) if >= 0 { if != 0 { := .highlowcontainer.getWritableContainerAtIndex().iremoveRange(int(), int(+1)) if !.isEmpty() { .highlowcontainer.setContainerAtIndex(, ) ++ } } } else { = - - 1 } if >= 0 { if != { := .highlowcontainer.getWritableContainerAtIndex().iremoveRange(int(0), int(+1)) if !.isEmpty() { .highlowcontainer.setContainerAtIndex(, ) } else { ++ } } else { ++ } } else { = - - 1 } .highlowcontainer.removeIndexRange(, ) } // Flip negates the bits in the given range (i.e., [rangeStart,rangeEnd)), any integer present in this range and in the bitmap is removed, // and any integer present in the range and not in the bitmap is added, a new bitmap is returned leaving // the current bitmap unchanged. // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range // while uint64(0x100000000) cannot be represented as a 32-bit value. func ( *Bitmap, , uint64) *Bitmap { if >= { return .Clone() } if > MaxUint32 { panic("rangeStart > MaxUint32") } if -1 > MaxUint32 { panic("rangeEnd-1 > MaxUint32") } := NewBitmap() := uint32(highbits(uint32())) := uint32(lowbits(uint32())) := uint32(highbits(uint32( - 1))) := uint32(lowbits(uint32( - 1))) // copy the containers before the active area .highlowcontainer.appendCopiesUntil(.highlowcontainer, uint16()) var uint32 = maxLowBit for := ; <= ; ++ { var uint32 if == { = uint32() } := if == { = uint32() } := .highlowcontainer.getIndex(uint16()) := .highlowcontainer.getIndex(uint16()) if >= 0 { := .highlowcontainer.getContainerAtIndex().not(int(), int()+1) if !.isEmpty() { .highlowcontainer.insertNewKeyValueAt(--1, uint16(), ) } } else { // *think* the range of ones must never be // empty. .highlowcontainer.insertNewKeyValueAt(--1, uint16(), rangeOfOnes(int(), int())) } } // copy the containers after the active area. .highlowcontainer.appendCopiesAfter(.highlowcontainer, uint16()) return } // SetCopyOnWrite sets this bitmap to use copy-on-write so that copies are fast and memory conscious // if the parameter is true, otherwise we leave the default where hard copies are made // (copy-on-write requires extra care in a threaded context). // Calling SetCopyOnWrite(true) on a bitmap created with FromBuffer is unsafe. func ( *Bitmap) ( bool) { .highlowcontainer.copyOnWrite = } // GetCopyOnWrite gets this bitmap's copy-on-write property func ( *Bitmap) () ( bool) { return .highlowcontainer.copyOnWrite } // CloneCopyOnWriteContainers clones all containers which have // needCopyOnWrite set to true. // This can be used to make sure it is safe to munmap a []byte // that the roaring array may still have a reference to, after // calling FromBuffer. // More generally this function is useful if you call FromBuffer // to construct a bitmap with a backing array buf // and then later discard the buf array. Note that you should call // CloneCopyOnWriteContainers on all bitmaps that were derived // from the 'FromBuffer' bitmap since they map have dependencies // on the buf array as well. func ( *Bitmap) () { .highlowcontainer.cloneCopyOnWriteContainers() } // FlipInt calls Flip after casting the parameters (convenience method) func ( *Bitmap, , int) *Bitmap { return Flip(, uint64(), uint64()) } // Statistics provides details on the container types in use. type Statistics struct { Cardinality uint64 Containers uint64 ArrayContainers uint64 ArrayContainerBytes uint64 ArrayContainerValues uint64 BitmapContainers uint64 BitmapContainerBytes uint64 BitmapContainerValues uint64 RunContainers uint64 RunContainerBytes uint64 RunContainerValues uint64 } // Stats returns details on container type usage in a Statistics struct. func ( *Bitmap) () Statistics { := Statistics{} .Containers = uint64(len(.highlowcontainer.containers)) for , := range .highlowcontainer.containers { .Cardinality += uint64(.getCardinality()) switch .(type) { case *arrayContainer: .ArrayContainers++ .ArrayContainerBytes += uint64(.getSizeInBytes()) .ArrayContainerValues += uint64(.getCardinality()) case *bitmapContainer: .BitmapContainers++ .BitmapContainerBytes += uint64(.getSizeInBytes()) .BitmapContainerValues += uint64(.getCardinality()) case *runContainer16: .RunContainers++ .RunContainerBytes += uint64(.getSizeInBytes()) .RunContainerValues += uint64(.getCardinality()) } } return }